
do "E:/ReplicateBuild/02_code/00_environment/00_set_environment.do"

*Table of Contents
local basedata = 1
	local dataCheck = 1

********************************************************************************
* Read in applications
********************************************************************************

if `basedata' == 1 {
	********************************************************************************
	* Create dataset of all unique applications.
	* Identifier: applicant_id job_id
	********************************************************************************
	
	insheet using "$rawdata/FOCAL/Applied Jobs.csv", comma clear nonames 
	ren (v1 v2 v3 v4 v5 v6 v7 v8) ///
		(applicant_id applied_date_str job_id job_name school_code_str application_outcome_str job_type_str job_school)

	* convert string dates to numeric
	gen len_date = length(applied_date_str)	
	assert len_date==10
	gen applied_date = date(applied_date_str,"MDY")
	format applied_date %d
	drop len_date applied_date_str
	
	* clean up school code
	destring school_code_str, gen(school_code) ignore("-ACLNUX")
	* check that codes remain unique after removing characters
	unique school_code
	unique school_code_str 
	drop school_code_str
	
	if `dataCheck' ==1 {
	* confirm that school codes map to unique school names
		bys job_school: egen min_school_code = min(school_code)
		bys job_school: egen max_school_code = max(school_code)
		assert min_school_code==max_school_code | job_school=="NULL"
		drop min_school_code max_school_code
		
		gen null_school = (job_school=="NULL")
		bys school_code: egen min_null_school = min(null_school)
		bys school_code: egen max_null_school = max(null_school)
		assert min_null_school==max_null_school
		drop *null_school
	}
	
	unique applicant_id job_id
	sort applicant_id job_id
	
	* encode string variables as numeric to save space
	
	encode job_type_str, gen(job_type)
	drop job_type_str
	
	encode application_outcome_str, gen(application_outcome)
	drop application_outcome_str
	
	*** filter to a more homogeneous sample of positions
	
		* level of school

			gen elem_title = 0
			replace elem_title = strpos(lower(job_name),"4-6")>0 if elem_title==0
			replace elem_title = strpos(lower(job_name),"-5")>0 if elem_title==0
			replace elem_title = strpos(lower(job_name),"elem")>0 if elem_title==0
			replace elem_title = strpos(lower(job_name),"-6")>0 if elem_title==0
			replace elem_title = strpos(lower(job_name),"k-")>0 if elem_title==0
			replace elem_title = strpos(lower(job_name),"3rd")>0 if elem_title==0
			replace elem_title = strpos(lower(job_name),"5th")>0 if elem_title==0
			replace elem_title = strpos(lower(job_name),"4th")>0 if elem_title==0

		
			gen base_sample = 1
			replace base_sample = 0 if job_type!=1 // non-instructional jobs
			replace base_sample = 0 if strpos(lower(job_name),"teacher")==0 // non-teachers
			replace base_sample = 0 if strpos(lower(job_name),"preschool")>0 // preschool teachers
			replace base_sample = 0 if elem_title==0 // non-elementary schools
			replace base_sample = 0 if strpos(lower(job_name),"art")>0 // art
			replace base_sample = 0 if strpos(lower(job_name),"music")>0 // music
			replace base_sample = 0 if strpos(lower(job_name),"specialized")>0 // specialized
			replace base_sample = 0 if strpos(lower(job_name),"math")>0 // math
			replace base_sample = 0 if strpos(lower(job_name),"computer")>0 // computer teachers
			replace base_sample = 0 if strpos(lower(job_name),"technology")>0 // technology teachers
			replace base_sample = 0 if strpos(lower(job_name),"literacy")>0 // literacy teachers
			replace base_sample = 0 if strpos(lower(job_name),"reading")>0 // reading teachers
			replace base_sample = 0 if strpos(lower(job_name),"physical education")>0 // physical education teachers
			replace base_sample = 0 if strpos(lower(job_name),"end of year")>0 | strpos(lower(job_name),"eoy")>0 // teachers to fill in the rest of the year
			replace base_sample = 0 if strpos(lower(job_name),"spanish")>0 // spanish teachers
			replace base_sample = 0 if strpos(lower(job_name),"science")>0 // science teachers
			replace base_sample = 0 if strpos(lower(job_name),"interim")>0 // interim teachers
			replace base_sample = 0 if strpos(lower(job_name),"chinese")>0 // chinese teachers
			replace base_sample = 0 if strpos(lower(job_name),"montessori")>0 // montessori teachers
			replace base_sample = 0 if strpos(lower(job_name),"curriculum")>0 // special curriculum positions
			replace base_sample = 0 if strpos(lower(job_name),"esl")>0 // esl

			replace base_sample = 0 if strpos(lower(job_name),"lift")>0 // Project LIFT for at-risk youth
			replace base_sample = 0 if strpos(lower(job_name),"assistant")>0 // assistant teachers
			replace base_sample = 0 if strpos(lower(job_name),"virtual")>0 // virtual teachers
			replace base_sample = 0 if strpos(lower(job_name),"catalyst")>0 // catalyst talent developers
			replace base_sample = 0 if strpos(lower(job_name),"dance")>0 // dance teachers
			replace base_sample = 0 if strpos(lower(job_name),"self-contained")>0 // swd teachers
			replace base_sample = 0 if strpos(lower(job_name),"example posting")>0 // test postings
			replace base_sample = 0 if strpos(lower(job_name),"exchange")>0 // exchange teaches
			replace base_sample = 0 if strpos(lower(job_name),"intervention")>0 // intervention teachers
			replace base_sample = 0 if strpos(lower(job_name),"band")>0 // band teachers
			replace base_sample = 0 if strpos(lower(job_name),"resource")>0 // resource teachers
			replace base_sample = 0 if strpos(lower(job_name),"itinerant")>0 // itinerant coordinating teachers
			replace base_sample = 0 if strpos(lower(job_name),"spec behav")>0 // special behavior teachers
			replace base_sample = 0 if strpos(lower(job_name),"language")>0 // language teachers
			replace base_sample = 0 if strpos(lower(job_name),"orchestra")>0 // orchestra teachers
			replace base_sample = 0 if strpos(lower(job_name),"talent development")>0 // talent development teachers
			replace base_sample = 0 if strpos(lower(job_name),"social studies")>0 // social studies teachers
			replace base_sample = 0 if strpos(lower(job_name),"english learner")>0 // esl
			replace base_sample = 0 if strpos(lower(job_name),"extensions")>0 // extenion teachers
			replace base_sample = 0 if strpos(lower(job_name),"immersion")>0 // language teachers
			replace base_sample = 0 if strpos(lower(job_name),"academic support")>0 // academic support teachers
			replace base_sample = 0 if strpos(lower(job_name),"temporary")>0 // temporary teachers
			replace base_sample = 0 if strpos(lower(job_name),"continuous")>0 // continuous learning
			replace base_sample = 0 if strpos(lower(job_name),"early end")>0 // early end date positions
			replace base_sample = 0 if strpos(lower(job_name),"interventionist")>0 // interventionists
			replace base_sample = 0 if strpos(lower(job_name),"semester")>0 // part-year
			replace base_sample = 0 if strpos(job_name,"ELA")>0 // ELA teachers
			replace base_sample = 0 if strpos(lower(job_name),"substitute")>0 // long-term substitutes
			replace base_sample = 0 if strpos(lower(job_name),"bilingual")>0 // bilingual
			replace base_sample = 0 if strpos(lower(job_name),"health")>0 // health teachers

			
			
			* Teacher Leader Pathways (aka Project Lift & Success by Design)
			gen tlp = strpos(lower(job_name),"lift")>0 | strpos(lower(job_name),"l.i.f.t.")>0 | strpos(lower(job_name),"multi-classroom")>0 ///
			| strpos(lower(job_name),"multiclassroom")>0 | strpos(lower(job_name),"success by design")>0 | strpos(lower(job_name),"reach")>0 | strpos(lower(job_name),"mcl")>0 ///
			| strpos(lower(job_name),"sbd")>0 | strpos(lower(job_name),"extended impact")>0 | strpos(lower(job_name),"expanded impact")>0 | strpos(lower(job_school),"lift")>0 ///
			| strpos(lower(job_school),"l.i.f.t.")>0

			gen tlp_pool = (strpos(lower(job_name),"pool")>0 & tlp == 1) | strpos(lower(job_school),"lift")>0 | strpos(lower(job_school),"l.i.f.t.")>0
			gen tlp_job = (tlp == 1 & tlp_pool ==0)

			
			gen tlp_pos_type = ""
			
			replace tlp_pos_type = "LIFT" if strpos(lower(job_name),"lift")>0 | strpos(lower(job_name),"l.i.f.t.")>0 | ///
			strpos(lower(job_school),"lift")>0 | strpos(lower(job_school),"l.i.f.t.")>0
			
			replace tlp_pos_type = "RT" if strpos(lower(job_name),"reach team")>0 | strpos(lower(job_name),"reach instructional")>0 | strpos(lower(job_name),"reach teacher")>0
			
			replace tlp_pos_type = "MR" if strpos(lower(job_name),"master reach")>0
			replace tlp_pos_type = "SR" if strpos(lower(job_name),"senior reach")>0
			
			replace tlp_pos_type = "EI" if strpos(lower(job_name),"extended impact")>0 | strpos(lower(job_name),"expanded impact")>0 
			
			replace tlp_pos_type = "MCL1" if strpos(lower(job_name),"multi-classroom leader 1")>0 ///
			| strpos(lower(job_name),"multiclassroom leader 1")>0 | strpos(lower(job_name),"mcl1")>0 ///
			| strpos(lower(job_name),"multi-classroom 1")>0 

			replace tlp_pos_type = "MCL2" if strpos(lower(job_name),"multi-classroom leader 2")>0 | strpos(lower(job_name),"multiclassroom 2")>0 ///
			| strpos(lower(job_name),"mcl2")>0 |  strpos(lower(job_name),"multi-classroom 2")>0 
			
			replace tlp_pos_type = "other" if tlp_job==1 & tlp_pos_type==""
			replace tlp_pos_type = "" if tlp_pool == 1
			
			
			*Teach for America
			gen tfa_app = 0
			replace tfa_app = 1 if strpos(lower(job_name),"tfa")>0 | strpos(lower(job_name),"teach for america")>0 | job_id == "4600161183"
			
			* keep on-cycle positions and positions with most applications in the first year
			bys job_id: egen earliest_app = min(applied_date)
			bys job_id: egen latest_app = max(applied_date)
			gen app_year = year(earliest_app)
			gen oncycle = (month(earliest_app)>=4 & month(earliest_app)<=7) | (month(earliest_app)==8 & day(earliest_app)<=15)
			
			format earliest_app latest_app %d
			
			gen applied_app_year = (app_year==year(applied_date))
			bys job_id: egen frac_apply_app_year = mean(applied_app_year)
			
			replace base_sample = 0 if oncycle==0
			replace base_sample = 0 if frac_apply_app_year<.9
			

			drop frac_apply_app_year app_year applied_app_year
			
			gen app_year = year(applied_date)
	 

	ren application_outcome application_outcome_numeric
	decode application_outcome_numeric, gen(application_outcome)
	drop application_outcome_numeric
	
	gen application_outcome_category = ""
	replace application_outcome_category = "hired" if inlist(application_outcome,"Accepted-Pending Licensure","Hired","Hiring Request in Process","Offer Accepted")
	replace application_outcome_category = "offer_declined" if inlist(application_outcome,"Offer Declined")
	replace application_outcome_category = "interview" if inlist(application_outcome,"Completed BEI Interview","Contact for Interview","Interview Scheduled","Invited to Complete Virtual Interview") ///
		| inlist(application_outcome,"Invited to Interview","Recommended for Interview (By Request)")
	replace application_outcome_category = "positive_assessment" if inlist(application_outcome,"1st Choice","2nd Choice","Highly Recommend for Interview","Recommend","Recommend for Interview") ///
		| inlist(application_outcome,"Recommendation Accepted","Strong Candidate")
	replace application_outcome_category = "middle_assessment" if inlist(application_outcome,"Attended Info Session/Class","Hold for Later Consideration","Invited to Info Session/Class","Possible recommend for interview","Recommend with Hesitation")
	replace application_outcome_category = "neutral" if inlist(application_outcome,"Eligible Selection","New","Pool - Eligible","Pool Candidate")
	replace application_outcome_category = "negative_assessment" if inlist(application_outcome,"Failed Job Questionnaire","Incomplete Application","Ineligible Selection","Not Good Fit","Not Qualified") ///
		| inlist(application_outcome,"Pool - Ineligible","SS - INELIGIBLE","Screened - Not Selected")
	replace application_outcome_category = "negative_later_signal" if inlist(application_outcome,"Did not complete virtual interview","Interviewed-Not Selected","No Call No Show","Screened Virtual Interview - Not Selected")
	replace application_outcome_category = "withdrew" if inlist(application_outcome,"Candidate Withdrew Interest")
	assert application_outcome_category != ""

	gen ncerdc_lea = "600"
	gen ncerdc_schlcode = mod(school_code,1000)
	
	count if missing(applicant_id) | missing(job_id)
	assert r(N)==0
	duplicates report // no duplicates
	sort applicant_id job_id
	
	compress
	
	save "$basedata/FOCAL_applications", replace

}
